rm(list=ls())

library(tidyr)
library(dplyr)
library(ggplot2)
library(ggthemes)
library(tidyverse)
library(reshape2)
library(readr)

borders_unga_speeches <- read_csv('kw_relevance_predicted_set.csv') %>% 
  select(-starts_with('x')) %>% filter(Predicted == 1)

terms <- c('crim|smuggl|traffic', 'migra|refugee', 'demarc', 'terroris|rebel|guerilla|non-state', 'state', 'international' ,'war', 'disease|pandemic|infection|health|vaccin|virus')#, 'ethnic|minorit')

##########################################################
# COMPARING WAR/TERRORISM/ETC PARAGRAPH BORDER FREQUENCY #
##########################################################
full_unga_speeches <- read.csv('UNGAplenary_tiles_v5.csv', stringsAsFactors = FALSE)

for (term in terms){
  full_unga_speeches[,term] <- grepl(pattern = term, x = full_unga_speeches$text, ignore.case = TRUE)
}

full_unga_speeches <- full_unga_speeches %>%
  left_join(borders_unga_speeches %>% select(plenary_doc_id, doc_id) %>% mutate(border_relevant=TRUE), 
            by=c('plenary_doc_id', 'doc_id')) %>% 
  mutate(border_relevant = ifelse(!is.na(border_relevant), border_relevant, FALSE))

term_data <- full_unga_speeches %>% group_by(year, border_relevant) %>% dplyr::summarise(`crim|smuggl|traffic` = sum(`crim|smuggl|traffic`)/n(),
                                                                           `migra|refugee` = sum(`migra|refugee`)/n(),
                                                                           `disease` = sum(`disease|pandemic|infection|health|vaccin|virus`)/n(),
                                                                           terroris = sum(`terroris|rebel|guerilla|non-state`)/n(),
                                                                           demarc = sum(demarc)/n(),
                                                                           state = sum(state)/n(),
                                                                           international = sum(international)/n(),
                                                                           war = sum(war)/n()) %>% dplyr::rename(Crime = `crim|smuggl|traffic`, Migration = `migra|refugee`, Disease = disease, Terrorism = terroris, Demaracation = demarc, 
                                                                                                                 State = state, International = international, War = war)

#figure D1 
term_data %>% pivot_longer(!year:border_relevant) %>% 
  ggplot(aes(x=year, y=value, color=border_relevant)) + facet_wrap(~name, ncol=2) + geom_smooth() + 
  theme(legend.position = 'bottom',
        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"))



